-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[VPlan] Make CanIV part of region. #144803
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
2718ea5
to
5c5cada
Compare
You can test this locally with the following command:git-clang-format --diff origin/main HEAD --extensions cpp,h -- llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h llvm/lib/Transforms/Vectorize/LoopVectorize.cpp llvm/lib/Transforms/Vectorize/VPlan.cpp llvm/lib/Transforms/Vectorize/VPlan.h llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp llvm/lib/Transforms/Vectorize/VPlanUnroll.cpp llvm/lib/Transforms/Vectorize/VPlanUtils.cpp llvm/lib/Transforms/Vectorize/VPlanValue.h llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp llvm/unittests/Transforms/Vectorize/VPlanHCFGTest.cpp llvm/unittests/Transforms/Vectorize/VPlanPatternMatchTest.cpp llvm/unittests/Transforms/Vectorize/VPlanVerifierTest.cpp
View the diff from clang-format here.diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index bca278cbe..5af1a6eea 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -9416,7 +9416,9 @@ static bool processLoopInVPlanNativePath(
VF.MinProfitableTripCount);
// Checks are the same for all VPlans, added to Plan only for
// compactness.
- LVP.attachRuntimeChecks(BestPlan, Checks, hasBranchWeightMD(*L->getLoopLatch()->getTerminator()));
+ LVP.attachRuntimeChecks(
+ BestPlan, Checks,
+ hasBranchWeightMD(*L->getLoopLatch()->getTerminator()));
LVP.prepareToExecute(BestPlan, VF.Width, /*UF=*/1, Checks, false);
LVP.executePlan(VF.Width, /*UF=*/1, BestPlan, LB, DT, false);
}
@@ -9750,8 +9752,7 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
}) &&
"the canonical IV should only be used by its increment or "
"ScalarIVSteps when resetting the start value");
- IV->setOperand(0, VPV);
-
+ IV->setOperand(0, VPV);
// Ensure that the start values for all header phi recipes are updated before
// vectorizing the epilogue loop.
@@ -10217,7 +10218,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
LVP.addMinimumIterationCheck(BestPlan, VF.Width, IC,
VF.MinProfitableTripCount);
- LVP.attachRuntimeChecks(BestPlan, Checks, hasBranchWeightMD(*L->getLoopLatch()->getTerminator()));
+ LVP.attachRuntimeChecks(
+ BestPlan, Checks,
+ hasBranchWeightMD(*L->getLoopLatch()->getTerminator()));
LVP.prepareToExecute(BestPlan, VF.Width, IC, Checks, false);
LVP.executePlan(VF.Width, IC, BestPlan, Unroller, DT, false);
ORE->emit([&]() {
@@ -10247,7 +10250,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
EpilogueVectorizerMainLoop MainILV(L, PSE, LI, DT, TTI, AC, EPI, &CM,
BFI, PSI, Checks, *BestMainPlan);
- LVP.attachRuntimeChecks(*BestMainPlan, Checks, hasBranchWeightMD(*L->getLoopLatch()->getTerminator()));
+ LVP.attachRuntimeChecks(
+ *BestMainPlan, Checks,
+ hasBranchWeightMD(*L->getLoopLatch()->getTerminator()));
auto ExpandedSCEVs = LVP.prepareToExecute(
*BestMainPlan, EPI.MainLoopVF, EPI.MainLoopUF, Checks, false);
LVP.executePlan(EPI.MainLoopVF, EPI.MainLoopUF, *BestMainPlan, MainILV,
@@ -10293,7 +10298,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
VF.Width, IC, PSE);
LVP.addMinimumIterationCheck(BestPlan, VF.Width, IC,
VF.MinProfitableTripCount);
- LVP.attachRuntimeChecks(BestPlan, Checks, hasBranchWeightMD(*L->getLoopLatch()->getTerminator()));
+ LVP.attachRuntimeChecks(
+ BestPlan, Checks,
+ hasBranchWeightMD(*L->getLoopLatch()->getTerminator()));
LVP.prepareToExecute(BestPlan, VF.Width, IC, Checks, false);
LVP.executePlan(VF.Width, IC, BestPlan, LB, DT, false);
++LoopsVectorized;
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice! Initial review after taking a first look.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
/// Returns the canonical induction recipe of the vector loop. | |
/// Returns the canonical induction VPValue of the vector loop. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
if (!isa<VPCanonicalIVPHIRecipe>(&*Entry->begin())) { | |
errs() << "VPlan vector loop header does not start with a " | |
"VPCanonicalIVPHIRecipe\n"; | |
if (!Plan.getCanonicalIV()) { | |
errs() << "VPlan vector loop lacks a canonical IV\n"; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
// When vectorizing the epilogue loop, the canonical induction start | |
// value needs to be changed from zero to the value after the main | |
// vector loop. Find the resume value created during execution of the main | |
// VPlan. | |
// FIXME: Improve modeling for canonical IV start values in the epilogue | |
// loop. | |
// When vectorizing the epilogue loop, the canonical induction start value | |
// needs to be changed from zero to the value after the main vector loop. Find | |
// the resume value created during execution of the main VPlan. | |
// FIXME: Improve modeling for canonical IV start values in the epilogue loop. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CanIVInc = cast<VPInstruction>(U); | |
assert(!CanIVInc && "Expecting at most one canonical IV increment"); | |
CanIVInc = cast<VPInstruction>(U); |
?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This Count might not be an add of canIV with VFxUF?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfotunately yes, after narrowInterleaveGroups it will be 1. I updated the code to check for users and consider both cases (increment by UFxVF and used in branchoncond)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
CanonicalIV is never expected to sign wrap? (CanonicalIVIncrement->hasNoSignedWrap replaced with false)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, currently sign-wrap was never set.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Place before VPDef to try and maintain lex order?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ditto
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
VPCanonicalIVSC, /// A VPValue sub-class that is a VPRecipeBase. | |
VPCanonicalIVSC, /// A VPValue sub-class that represents the canonical IV of a loop region. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Dropped intentionally?
5c5cada
to
9a6b248
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This should be in pretty good shape, still most printing tests needs to be updated.
This now depends on #155301, to be able to track the live-ness of the canonical IV accurartely when estimating register pressure
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Updated, thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Unfotunately yes, after narrowInterleaveGroups it will be 1. I updated the code to check for users and consider both cases (increment by UFxVF and used in branchoncond)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
code above moved into the loop
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Not needed in the latest version, removed, thanks!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
added back, thanks
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code now gone from the patch
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Code now gone from the patch
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, previously we would automically do this for VPCanonicalPHIRecipe.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, currently sign-wrap was never set.
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-backend-aarch64 Author: Florian Hahn (fhahn) ChangesPatch is 208.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/144803.diff 40 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
index 490f6391c15a0..defeb80118337 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp
@@ -4891,7 +4891,9 @@ static bool shouldUnrollMultiExitLoop(Loop *L, ScalarEvolution &SE,
// a symbolic expression. Multi-exit loops with small known trip counts will
// likely be unrolled anyway.
const SCEV *BTC = SE.getSymbolicMaxBackedgeTakenCount(L);
- if (isa<SCEVConstant>(BTC) || isa<SCEVCouldNotCompute>(BTC))
+ if ((isa<SCEVConstant>(BTC) &&
+ !isa<SCEVConstant>(SE.getExitCount(L, L->getLoopLatch()))) ||
+ isa<SCEVCouldNotCompute>(BTC))
return false;
// It might not be worth unrolling loops with low max trip counts. Restrict
@@ -5111,6 +5113,7 @@ void AArch64TTIImpl::getUnrollingPreferences(
// Allow slightly more costly trip-count expansion to catch search loops
// with pointer inductions.
UP.SCEVExpansionBudget = 5;
+ UP.Partial = isa<SCEVConstant>(SE.getExitCount(L, L->getLoopLatch()));
return;
}
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index a0f306c12754f..214d1e0e1ae80 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4147,7 +4147,6 @@ static bool willGenerateVectors(VPlan &Plan, ElementCount VF,
case VPDef::VPScalarIVStepsSC:
case VPDef::VPReplicateSC:
case VPDef::VPInstructionSC:
- case VPDef::VPCanonicalIVPHISC:
case VPDef::VPVectorPointerSC:
case VPDef::VPVectorEndPointerSC:
case VPDef::VPExpandSCEVSC:
@@ -8664,6 +8663,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
m_Specific(Plan->getCanonicalIV()), m_VPValue())) &&
"Did not find the canonical IV increment");
cast<VPRecipeWithIRFlags>(IVInc)->dropPoisonGeneratingFlags();
+ Plan->getCanonicalIV()->dropNUW();
}
// ---------------------------------------------------------------------------
@@ -8727,8 +8727,7 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlanWithVPRecipes(
// latter are added above for masking.
// FIXME: Migrate code relying on the underlying instruction from VPlan0
// to construct recipes below to not use the underlying instruction.
- if (isa<VPCanonicalIVPHIRecipe, VPWidenCanonicalIVRecipe, VPBlendRecipe>(
- &R) ||
+ if (isa<VPWidenCanonicalIVRecipe, VPBlendRecipe>(&R) ||
(isa<VPInstruction>(&R) && !UnderlyingValue))
continue;
@@ -8949,8 +8948,6 @@ VPlanPtr LoopVectorizationPlanner::tryToBuildVPlan(VFRange &Range) {
VPRecipeBuilder RecipeBuilder(*Plan, OrigLoop, TLI, &TTI, Legal, CM, PSE,
Builder, BlockMaskCache, nullptr /*LVer*/);
for (auto &R : Plan->getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
- if (isa<VPCanonicalIVPHIRecipe>(&R))
- continue;
auto *HeaderR = cast<VPHeaderPHIRecipe>(&R);
RecipeBuilder.setRecipe(HeaderR->getUnderlyingInstr(), HeaderR);
}
@@ -9676,8 +9673,6 @@ static void preparePlanForMainVectorLoop(VPlan &MainPlan, VPlan &EpiPlan) {
SmallPtrSet<PHINode *, 2> EpiWidenedPhis;
for (VPRecipeBase &R :
EpiPlan.getVectorLoopRegion()->getEntryBasicBlock()->phis()) {
- if (isa<VPCanonicalIVPHIRecipe>(&R))
- continue;
EpiWidenedPhis.insert(
cast<PHINode>(R.getVPSingleValue()->getUnderlyingValue()));
}
@@ -9766,51 +9761,48 @@ preparePlanForEpilogueVectorLoop(VPlan &Plan, Loop *L,
DenseMap<Value *, Value *> ToFrozen;
// Ensure that the start values for all header phi recipes are updated before
// vectorizing the epilogue loop.
- for (VPRecipeBase &R : Header->phis()) {
- if (auto *IV = dyn_cast<VPCanonicalIVPHIRecipe>(&R)) {
- // When vectorizing the epilogue loop, the canonical induction start
- // value needs to be changed from zero to the value after the main
- // vector loop. Find the resume value created during execution of the main
- // VPlan. It must be the first phi in the loop preheader.
- // FIXME: Improve modeling for canonical IV start values in the epilogue
- // loop.
- using namespace llvm::PatternMatch;
- PHINode *EPResumeVal = &*L->getLoopPreheader()->phis().begin();
- for (Value *Inc : EPResumeVal->incoming_values()) {
- if (match(Inc, m_SpecificInt(0)))
- continue;
- assert(!EPI.VectorTripCount &&
- "Must only have a single non-zero incoming value");
- EPI.VectorTripCount = Inc;
- }
- // If we didn't find a non-zero vector trip count, all incoming values
- // must be zero, which also means the vector trip count is zero. Pick the
- // first zero as vector trip count.
- // TODO: We should not choose VF * UF so the main vector loop is known to
- // be dead.
- if (!EPI.VectorTripCount) {
- assert(
- EPResumeVal->getNumIncomingValues() > 0 &&
- all_of(EPResumeVal->incoming_values(),
- [](Value *Inc) { return match(Inc, m_SpecificInt(0)); }) &&
- "all incoming values must be 0");
- EPI.VectorTripCount = EPResumeVal->getOperand(0);
- }
- VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal);
- assert(all_of(IV->users(),
- [](const VPUser *U) {
- return isa<VPScalarIVStepsRecipe>(U) ||
- isa<VPDerivedIVRecipe>(U) ||
- cast<VPRecipeBase>(U)->isScalarCast() ||
- cast<VPInstruction>(U)->getOpcode() ==
- Instruction::Add;
- }) &&
- "the canonical IV should only be used by its increment or "
- "ScalarIVSteps when resetting the start value");
- IV->setOperand(0, VPV);
+ auto *IV = Plan.getCanonicalIV();
+ // When vectorizing the epilogue loop, the canonical induction start value
+ // needs to be changed from zero to the value after the main vector loop. Find
+ // the resume value created during execution of the main VPlan.
+ // FIXME: Improve modeling for canonical IV start values in the epilogue loop.
+ using namespace llvm::PatternMatch;
+ PHINode *EPResumeVal = &*L->getLoopPreheader()->phis().begin();
+ for (Value *Inc : EPResumeVal->incoming_values()) {
+ if (match(Inc, m_SpecificInt(0)))
continue;
- }
+ assert(!EPI.VectorTripCount &&
+ "Must only have a single non-zero incoming value");
+ EPI.VectorTripCount = Inc;
+ }
+ // If we didn't find a non-zero vector trip count, all incoming values
+ // must be zero, which also means the vector trip count is zero. Pick the
+ // first zero as vector trip count.
+ // TODO: We should not choose VF * UF so the main vector loop is known to
+ // be dead.
+ if (!EPI.VectorTripCount) {
+ assert(EPResumeVal->getNumIncomingValues() > 0 &&
+ all_of(EPResumeVal->incoming_values(),
+ [](Value *Inc) { return match(Inc, m_SpecificInt(0)); }) &&
+ "all incoming values must be 0");
+ EPI.VectorTripCount = EPResumeVal->getOperand(0);
+ }
+ VPValue *VPV = Plan.getOrAddLiveIn(EPResumeVal);
+ assert(all_of(IV->users(),
+ [](const VPUser *U) {
+ return isa<VPScalarIVStepsRecipe>(U) ||
+ isa<VPDerivedIVRecipe>(U) ||
+ cast<VPRecipeBase>(U)->isScalarCast() ||
+ cast<VPInstruction>(U)->getOpcode() ==
+ Instruction::Add;
+ }) &&
+ "the canonical IV should only be used by its increment or "
+ "ScalarIVSteps when resetting the start value");
+ IV->setOperand(0, VPV);
+ // Ensure that the start values for all header phi recipes are updated before
+ // vectorizing the epilogue loop.
+ for (VPRecipeBase &R : Header->phis()) {
Value *ResumeV = nullptr;
// TODO: Move setting of resume values to prepareToExecute.
if (auto *ReductionPhi = dyn_cast<VPReductionPHIRecipe>(&R)) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp
index f972efa07eb7e..f51fc5e0f172c 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp
@@ -771,10 +771,13 @@ static std::pair<VPBlockBase *, VPBlockBase *> cloneFrom(VPBlockBase *Entry) {
VPRegionBlock *VPRegionBlock::clone() {
const auto &[NewEntry, NewExiting] = cloneFrom(getEntry());
- auto *NewRegion = getPlan()->createVPRegionBlock(NewEntry, NewExiting,
- getName(), isReplicator());
+ auto *NewRegion =
+ getPlan()->createVPRegionBlock(NewEntry, NewExiting, getName());
for (VPBlockBase *Block : vp_depth_first_shallow(NewEntry))
Block->setParent(NewRegion);
+
+ if (CanIV)
+ NewRegion->CanIV = CanIV->clone();
return NewRegion;
}
@@ -868,6 +871,11 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
VPSlotTracker &SlotTracker) const {
O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
auto NewIndent = Indent + " ";
+ if (CanIV) {
+ O << '\n';
+ CanIV->print(O, NewIndent, SlotTracker);
+ O << '\n';
+ }
for (auto *BlockBase : vp_depth_first_shallow(Entry)) {
O << '\n';
BlockBase->print(O, NewIndent, SlotTracker);
@@ -880,18 +888,38 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
void VPRegionBlock::dissolveToCFGLoop() {
auto *Header = cast<VPBasicBlock>(getEntry());
- if (auto *CanIV = dyn_cast<VPCanonicalIVPHIRecipe>(&Header->front())) {
- assert(this == getPlan()->getVectorLoopRegion() &&
- "Canonical IV must be in the entry of the top-level loop region");
- auto *ScalarR = VPBuilder(CanIV).createScalarPhi(
- {CanIV->getStartValue(), CanIV->getBackedgeValue()},
- CanIV->getDebugLoc(), "index");
+ auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
+ if (CanIV && CanIV->getNumUsers() > 0) {
+ auto *ExitingTerm = ExitingLatch->getTerminator();
+ VPInstruction *CanIVInc = nullptr;
+ for (VPUser *U : CanIV->users()) {
+ VPValue *Inc;
+ if (!match(U, m_VPInstruction<Instruction::Add>(m_Specific(CanIV),
+ m_VPValue(Inc))))
+ continue;
+ auto *IncCand = cast<VPInstruction>(U);
+ if (Inc != &getPlan()->getVFxUF() &&
+ (IncCand->getNumUsers() != 1 ||
+ !match(*IncCand->user_begin(),
+ m_BranchOnCount(m_Specific(IncCand), m_VPValue()))))
+ continue;
+ assert(!CanIVInc && "Expecting at most one canonical IV increment");
+ CanIVInc = cast<VPInstruction>(U);
+ }
+ if (!CanIVInc) {
+ CanIVInc = VPBuilder(ExitingTerm)
+ .createOverflowingOp(Instruction::Add,
+ {CanIV, &getPlan()->getVFxUF()},
+ {CanIV->hasNoUnsignedWrap(), false},
+ CanIV->getDebugLoc(), "index.next");
+ }
+ auto *ScalarR = VPBuilder(Header, Header->begin())
+ .createScalarPhi({CanIV->getStartValue(), CanIVInc},
+ CanIV->getDebugLoc(), "index");
CanIV->replaceAllUsesWith(ScalarR);
- CanIV->eraseFromParent();
}
VPBlockBase *Preheader = getSinglePredecessor();
- auto *ExitingLatch = cast<VPBasicBlock>(getExiting());
VPBlockBase *Middle = getSingleSuccessor();
VPBlockUtils::disconnectBlocks(Preheader, this);
VPBlockUtils::disconnectBlocks(this, Middle);
@@ -928,7 +956,10 @@ VPlan::~VPlan() {
for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
R.setOperand(I, &DummyValue);
}
+ } else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV()) {
+ CanIV->replaceAllUsesWith(&DummyValue);
}
+
delete VPB;
}
for (VPValue *VPV : getLiveIns())
@@ -1212,6 +1243,11 @@ VPlan *VPlan::duplicate() {
// else NewTripCount will be created and inserted into Old2NewVPValues when
// TripCount is cloned. In any case NewPlan->TripCount is updated below.
+ if (auto *LoopRegion = getVectorLoopRegion()) {
+ Old2NewVPValues[LoopRegion->getCanonicalIV()] =
+ NewPlan->getVectorLoopRegion()->getCanonicalIV();
+ }
+
remapOperands(Entry, NewEntry, Old2NewVPValues);
// Initialize remaining fields of cloned VPlan.
@@ -1392,6 +1428,8 @@ void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
/// Returns true if there is a vector loop region and \p VPV is defined in a
/// loop region.
static bool isDefinedInsideLoopRegions(const VPValue *VPV) {
+ if (isa<VPCanonicalIV>(VPV))
+ return true;
const VPRecipeBase *DefR = VPV->getDefiningRecipe();
return DefR && (!DefR->getParent()->getPlan()->getVectorLoopRegion() ||
DefR->getParent()->getEnclosingLoopRegion());
@@ -1501,9 +1539,12 @@ void VPSlotTracker::assignNames(const VPlan &Plan) {
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
- for (const VPBasicBlock *VPBB :
- VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
- assignNames(VPBB);
+ for (const VPBlockBase *VPB : RPOT) {
+ if (auto *VPBB = dyn_cast<VPBasicBlock>(VPB)) {
+ assignNames(VPBB);
+ } else if (auto *CanIV = cast<VPRegionBlock>(VPB)->getCanonicalIV())
+ assignName(CanIV);
+ }
}
void VPSlotTracker::assignNames(const VPBasicBlock *VPBB) {
diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h
index 33bcb49b81740..c4ecd474ca848 100644
--- a/llvm/lib/Transforms/Vectorize/VPlan.h
+++ b/llvm/lib/Transforms/Vectorize/VPlan.h
@@ -547,7 +547,6 @@ class VPSingleDefRecipe : public VPRecipeBase, public VPValue {
case VPRecipeBase::VPWidenSelectSC:
case VPRecipeBase::VPBlendSC:
case VPRecipeBase::VPPredInstPHISC:
- case VPRecipeBase::VPCanonicalIVPHISC:
case VPRecipeBase::VPActiveLaneMaskPHISC:
case VPRecipeBase::VPFirstOrderRecurrencePHISC:
case VPRecipeBase::VPWidenPHISC:
@@ -1929,12 +1928,6 @@ class VPVectorPointerRecipe : public VPRecipeWithIRFlags,
/// the backedge is the second operand.
///
/// Inductions are modeled using the following sub-classes:
-/// * VPCanonicalIVPHIRecipe: Canonical scalar induction of the vector loop,
-/// starting at a specified value (zero for the main vector loop, the resume
-/// value for the epilogue vector loop) and stepping by 1. The induction
-/// controls exiting of the vector loop by comparing against the vector trip
-/// count. Produces a single scalar PHI for the induction value per
-/// iteration.
/// * VPWidenIntOrFpInductionRecipe: Generates vector values for integer and
/// floating point inductions with arbitrary start and step values. Produces
/// a vector PHI per-part.
@@ -3288,61 +3281,51 @@ class VPExpandSCEVRecipe : public VPSingleDefRecipe {
const SCEV *getSCEV() const { return Expr; }
};
-/// Canonical scalar induction phi of the vector loop. Starting at the specified
+/// Canonical scalar induction of the vector loop. Starting at the specified
/// start value (either 0 or the resume value when vectorizing the epilogue
-/// loop). VPWidenCanonicalIVRecipe represents the vector version of the
-/// canonical induction variable.
-class VPCanonicalIVPHIRecipe : public VPHeaderPHIRecipe {
+/// loop).
+class VPCanonicalIV : public VPValue, public VPUser {
+ bool HasNUW = true;
+ DebugLoc DL;
+
public:
- VPCanonicalIVPHIRecipe(VPValue *StartV, DebugLoc DL)
- : VPHeaderPHIRecipe(VPDef::VPCanonicalIVPHISC, nullptr, StartV, DL) {}
+ VPCanonicalIV(VPValue *StartV, bool HasNUW = true,
+ DebugLoc DL = DebugLoc::getUnknown())
+ : VPValue(VPValue::VPCanonicalIVSC), VPUser(StartV), HasNUW(HasNUW),
+ DL(DL) {}
- ~VPCanonicalIVPHIRecipe() override = default;
+ ~VPCanonicalIV() override = default;
- VPCanonicalIVPHIRecipe *clone() override {
- auto *R = new VPCanonicalIVPHIRecipe(getOperand(0), getDebugLoc());
- R->addOperand(getBackedgeValue());
- return R;
+ VPCanonicalIV *clone() {
+ return new VPCanonicalIV(getOperand(0), HasNUW, DL);
}
- VP_CLASSOF_IMPL(VPDef::VPCanonicalIVPHISC)
+ static inline bool classof(const VPValue *V) {
+ return V->getVPValueID() == VPValue::VPCanonicalIVSC;
+ }
- void execute(VPTransformState &State) override {
- llvm_unreachable("cannot execute this recipe, should be replaced by a "
- "scalar phi recipe");
+ bool onlyFirstLaneUsed(const VPValue *Op) const override {
+ assert(is_contained(operands(), Op) &&
+ "Op must be an operand of the recipe");
+ return true;
}
+ void dropNUW() { HasNUW = false; }
+ bool hasNoUnsignedWrap() const { return HasNUW; }
+
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
/// Print the recipe.
void print(raw_ostream &O, const Twine &Indent,
- VPSlotTracker &SlotTracker) const override;
+ VPSlotTracker &SlotTracker) const;
#endif
+ VPValue *getStartValue() const { return getOperand(0); }
+ DebugLoc getDebugLoc() { return DL; }
+
/// Returns the scalar type of the induction.
Type *getScalarType() const {
return getStartValue()->getLiveInIRValue()->getType();
}
-
- /// Returns true if the recipe only uses the first lane of operand \p Op.
- bool onlyFirstLaneUsed(const VPValue *Op) const override {
- assert(is_contained(operands(), Op) &&
- "Op must be an operand of the recipe");
- return true;
- }
-
- /// Returns true if the recipe only uses the first part of operand \p Op.
- bool onlyFirstPartUsed(const VPValue *Op) const override {
- assert(is_contained(operands(), Op) &&
- "Op must be an operand of the recipe");
- return true;
- }
-
- /// Return the cost of this VPCanonicalIVPHIRecipe.
- InstructionCost computeCost(ElementCount VF,
- VPCostContext &Ctx) const override {
- // For now, match the behavior of the legacy cost model.
- return 0;
- }
};
/// A recipe for generating the active lane mask for the vector loop that is
@@ -3423,14 +3406,13 @@ class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe {
class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe,
public VPUnrollPartAccessor<1> {
public:
- VPWidenCanonicalIVRecipe(VPCanonicalIVPHIRecipe *CanonicalIV)
+ VPWidenCanonicalIVRecipe(VPValue *CanonicalIV)
: VPSingleDefRecipe(VPDef::VPWidenCanonicalIVSC, {CanonicalIV}) {}
~VPWidenCanonicalIVRecipe() override = default;
VPWidenCanonicalIVRecipe *clone() override {
- return new VPWidenCanonicalIVRecipe(
- cast<VPCanonicalIVPHIRecipe>(getOperand(0)));
+ return new VPWidenCanonicalIVRecipe(getOperand(0));
}
VP_CLASSOF_IMPL(VPDef::VPWidenCanonicalIVSC)
@@ -3469,7 +3451,7 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
public:
VPDerivedIVRecipe(const InductionDescriptor &IndDesc, VPValue *Start,
- VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step,
+ VPCanonicalIV *CanonicalIV, VPValue *Step,
const Twine &Name = "")
: VPDerivedIVRecipe(
IndDesc.getKind(),
@@ -3833,26 +3815,39 @@ class LLVM_ABI_FOR_TEST VPRegionBlock : public VPBlockBase {
/// VPRegionBlock.
VPBlockBase *Exiting;
- /// An indicator whether this region is to generate multiple replicated
- /// instances of output IR corresponding to its VPBlockBases.
- bool IsReplicator;
+ /// Canonical IV of the loop region. If nullptr, the region is a replication
+ /// region.
+ VPCanonicalIV *CanIV = nullptr;
/// Use VPlan::createVPRegionBlock to create VPRegionBlocks.
VPRegionBlock(VPBlockBase *Entry, VPBlockBase *Exiting,
- const std::string &Name = "", bool IsReplicator = false)
+ const std::string &Name = "")
+ : VPBlockBase(VPRegionBlockSC, Name), Entry(Entry), Exiting(Exiting) {
+ assert(Entry-...
[truncated]
|
9a6b248
to
bc28475
Compare
Update calculateRegisterUsageForPlan to track live-ness of VPValues instead of recipes. This gives slightly more accurate results for recipes that define multiple values (i.e. VPInterleaveRecipe). When tracking the live-ness of recipes, all VPValues defined by an VPInterleaveRecipe are considered alive until the last use of any of them. When tracking the live-ness of individual VPValues, we can accurately track the individual values until their last use. Note the changes in large-loop-rdx.ll and pr47437.ll. This patch restores the original behavior before introducing VPlan-based liveness tracking.
bc28475
to
e6d1e69
Compare
Closing in favor of #156262 |
No description provided.